#include "stdinc.h"
#include "newhash.h"
#include "extfunc.h"
#include "extvab.h"

static char src_rc_seq[1024];
extern long long single_count;
extern long long single_map;
void readseq1by1(char *src_seq, char *src_name, int *len_seq, FILE *fp, long long num_seq)
{
  int	i, k, n, strL;
  char	c;
  char	str[5000];

  n = 0;
  k = num_seq;

  while(fgets(str, 4950, fp))
    {
      if(str[0] == '#')	continue;

      if(str[0] == '>')
        {
          /*
          if(k >= 0)	{  // if this isn't the first '>' in the file
          	*len_seq = n;
          }
          */
          *len_seq = n;
          n = 0;
          sscanf(&str[1], "%s", src_name);
          return;
        }
      else
        {
          strL = strlen(str);

          if(strL + n > maxReadLen)
            strL = maxReadLen - n;

          for(i = 0; i < strL; i ++)
            {
              if(str[i] >= 'a' && str[i] <= 'z')
                {
                  c = base2int(str[i] - 'a' + 'A');
                  src_seq[n ++] = c;
                }
              else if(str[i] >= 'A' && str[i] <= 'Z')
                {
                  c = base2int(str[i]);
                  src_seq[n ++] = c;
                  // after pre-process all the symbles would be a,g,c,t,n in lower or upper case.
                }
              else if(str[i] == '.')
                {
                  c = base2int('A');
                  src_seq[n ++] = c;
                }          // after pre-process all the symbles would be a,g,c,t,n in lower or upper case.
            }

          //printf("%d: %d\n",k,n);
        }
    }

  if(k >= 0)
    {
      *len_seq = n;
      return;
    }

  *len_seq = 0;
}


void read_one_sequence(FILE *fp, long long *T, char **X)

{

  char *fasta, *src_name; //point to fasta array
  int num_seq, len, name_len, min_len;

  num_seq = readseqpar(&len, &min_len, &name_len, fp);

  if(num_seq < 1)
    {
      printf("no fasta sequence in file\n");
      *T = 0;
      return;
    }

  fasta = (char *)ckalloc(len * sizeof(char));
  src_name = (char *)ckalloc((name_len + 1) * sizeof(char));
  rewind(fp);

  readseq1by1(fasta, src_name, &len, fp, -1);
  readseq1by1(fasta, src_name, &len, fp, 0);

  *X = fasta;
  *T = len;
  free((void *)src_name);
}

long long multiFileParse(int *max_leg, int *min_leg, int *max_name_leg, FILE *fp)
{

  char str[5000];
  FILE *freads;
  int slen;
  long long counter = 0;
  *max_name_leg = *max_leg = 1;
  *min_leg = 1000;

  while(fgets(str, 4950, fp))
    {
      slen = strlen(str);
      str[slen - 1] = str[slen];
      freads = ckopen(str, "r");
      counter += readseqpar(max_leg, min_leg, max_name_leg, freads);
      fclose(freads);
    }

  return counter;
}

long long readseqpar(int *max_leg, int *min_leg, int *max_name_leg, FILE *fp)
{
  int	l, n;
  long long k;
  char	str[5000], src_name[5000];


  n = 0;
  k = -1;

  while(fgets(str, 4950, fp))
    {
      if(str[0] == '>')
        {
          if(k >= 0)
            {
              if(n > *max_leg)
                *max_leg = n;

              if(n < *min_leg)
                *min_leg = n;

            }

          n = 0;
          k ++;
          sscanf(&str[1], "%s", src_name);

          if((l = strlen(src_name)) > *max_name_leg)
            * max_name_leg = l;
        }
      else
        {
          n += strlen(str) - 1;
        }
    }

  if(n > *max_leg)
    *max_leg = n;

  if(n < *min_leg)
    *min_leg = n;

  k ++;
  return(k);
}

void read1seqfq(char *src_seq, char *src_name, int *len_seq, FILE *fp)
{
  int	i, n, strL;
  char	c;
  char	str[5000];
  boolean flag = 0;

  while(fgets(str, 4950, fp))
    {
      if(str[0] == '@')
        {
          flag = 1;
          sscanf(&str[1], "%s", src_name);
          break;
        }
    }

  if(!flag)    //last time reading fq file get this
    {
      *len_seq = 0;
      return;
    }

  n = 0;

  while(fgets(str, 4950, fp))
    {
      if(str[0] == '+')
        {
          fgets(str, 4950, fp); // pass quality value line
          *len_seq = n;
          return;
        }
      else
        {
          strL = strlen(str);

          if(strL + n > maxReadLen)
            strL = maxReadLen - n;

          for(i = 0; i < strL; i ++)
            {
              if(str[i] >= 'a' && str[i] <= 'z')
                {
                  c = base2int(str[i] - 'a' + 'A');
                  src_seq[n ++] = c;
                }
              else if(str[i] >= 'A' && str[i] <= 'Z')
                {
                  c = base2int(str[i]);
                  src_seq[n ++] = c;
                  // after pre-process all the symbles would be a,g,c,t,n in lower or upper case.
                }
              else if(str[i] == '.')
                {
                  c = base2int('A');
                  src_seq[n ++] = c;
                }          // after pre-process all the symbles would be a,g,c,t,n in lower or upper case.
            }

          //printf("%d: %d\n",k,n);
        }
    }

  *len_seq = n;
  return;
}

// find the next file to open in libs
static int nextValidIndex(int libNo, boolean pair, unsigned char asm_ctg)
{
  int i = libNo;

  while(i < num_libs)
    {
      if(asm_ctg == 1 && (lib_array[i].asm_flag != 1 && lib_array[i].asm_flag != 3))
        {
          i++;
          continue;
        }
      else if(asm_ctg == 0 && (lib_array[i].asm_flag != 2 && lib_array[i].asm_flag != 3))
        {
          i++;
          continue;
        }
      else if(asm_ctg > 1 && lib_array[i].asm_flag != asm_ctg) // reads for other purpose
        {
          i++;
          continue;
        }

      if(lib_array[i].curr_type == 1 &&
          lib_array[i].curr_index < lib_array[i].num_a1_file)
        return i;

      if(lib_array[i].curr_type == 2 &&
          lib_array[i].curr_index < lib_array[i].num_q1_file)
        return i;

      if(lib_array[i].curr_type == 3 &&
          lib_array[i].curr_index < lib_array[i].num_p_file)
        return i;

      if(pair)
        {
          if(lib_array[i].curr_type < 3)
            {
              lib_array[i].curr_type++;
              lib_array[i].curr_index = 0;
            }
          else
            i++;

          continue;
        }

      if(lib_array[i].curr_type == 4 &&
          lib_array[i].curr_index < lib_array[i].num_s_a_file)
        return i;

      if(lib_array[i].curr_type == 5 &&
          lib_array[i].curr_index < lib_array[i].num_s_q_file)
        return i;

      if(lib_array[i].curr_type < 5)
        {
          lib_array[i].curr_type++;
          lib_array[i].curr_index = 0;
        }
      else
        i++;
    }//for each lib

  return i;
}

static FILE *openFile4read(char *fname)
{
  FILE *fp;

  if(strlen(fname) > 3 && strcmp(fname + strlen(fname) - 3, ".gz") == 0)
    {
      char *cmd = (char *)ckalloc((strlen(fname) + 20) * sizeof(char));
      sprintf(cmd, "gzip -dc %s", fname);
      fp = popen(cmd, "r");
      free(cmd);
      return fp;
    }
  else
    {
      return ckopen(fname, "r");
    }

}

void openFileInLib(int libNo)
{
  int i = libNo;

  if(lib_array[i].curr_type == 1)
    {
      printf("[%s]opened file:\n %s\n",
             __FUNCTION__, lib_array[i].a1_fname[lib_array[i].curr_index]);
      printf("[%s]opened file:\n %s\n",
             __FUNCTION__, lib_array[i].a2_fname[lib_array[i].curr_index]);
      lib_array[i].fp1 = openFile4read(lib_array[i].a1_fname[lib_array[i].curr_index]);
      lib_array[i].fp2 = openFile4read(lib_array[i].a2_fname[lib_array[i].curr_index]);
      lib_array[i].curr_index++;
      lib_array[i].paired = 1;
    }
  else if(lib_array[i].curr_type == 2)
    {
      printf("[%s]opened file:\n %s\n",
             __FUNCTION__, lib_array[i].q1_fname[lib_array[i].curr_index]);
      printf("[%s]opened file:\n %s\n",
             __FUNCTION__, lib_array[i].q2_fname[lib_array[i].curr_index]);
      lib_array[i].fp1 = openFile4read(lib_array[i].q1_fname[lib_array[i].curr_index]);
      lib_array[i].fp2 = openFile4read(lib_array[i].q2_fname[lib_array[i].curr_index]);
      lib_array[i].curr_index++;
      lib_array[i].paired = 1;
    }
  else if(lib_array[i].curr_type == 3)
    {
      printf("[%s]opened file:\n %s\n",
             lib_array[i].p_fname[lib_array[i].curr_index]);
      lib_array[i].fp1 = openFile4read(lib_array[i].p_fname[lib_array[i].curr_index]);
      lib_array[i].curr_index++;
      lib_array[i].paired = 0;
    }
  else if(lib_array[i].curr_type == 4)
    {
      printf("[%s]opened file:\n %s\n",
             __FUNCTION__, lib_array[i].s_a_fname[lib_array[i].curr_index]);
      lib_array[i].fp1 = openFile4read(lib_array[i].s_a_fname[lib_array[i].curr_index]);
      lib_array[i].curr_index++;
      lib_array[i].paired = 0;
    }
  else if(lib_array[i].curr_type == 5)
    {
      printf("[%s]opened file:\n %s\n",
             __FUNCTION__, lib_array[i].s_q_fname[lib_array[i].curr_index]);
      lib_array[i].fp1 = openFile4read(lib_array[i].s_q_fname[lib_array[i].curr_index]);
      lib_array[i].curr_index++;
      lib_array[i].paired = 0;
    }

}

static void reverse2k(char *src_seq, int len_seq)
{
  if(!len_seq)
    return;

  int i;
  reverseComplementSeq(src_seq, len_seq, src_rc_seq);

  for(i = 0; i < len_seq; i++)
    src_seq[i] = src_rc_seq[i];
}

static void closeFp1InLab(int libNo)
{
  int ftype = lib_array[libNo].curr_type;
  int index = lib_array[libNo].curr_index - 1;
  char *fname;

  if(ftype == 1)
    fname = lib_array[libNo].a1_fname[index];
  else if(ftype == 2)
    fname = lib_array[libNo].q1_fname[index];
  else if(ftype == 3)
    fname = lib_array[libNo].p_fname[index];
  else if(ftype == 4)
    fname = lib_array[libNo].s_a_fname[index];
  else if(ftype == 5)
    fname = lib_array[libNo].s_q_fname[index];
  else
    return;

  if(strlen(fname) > 3 && strcmp(fname + strlen(fname) - 3, ".gz") == 0)
    pclose(lib_array[libNo].fp1);
  else
    fclose(lib_array[libNo].fp1);
}

static void closeFp2InLab(int libNo)
{
  int ftype = lib_array[libNo].curr_type;
  int index = lib_array[libNo].curr_index - 1;
  char *fname;

  if(ftype == 1)
    fname = lib_array[libNo].a2_fname[index];
  else if(ftype == 2)
    fname = lib_array[libNo].q2_fname[index];
  else
    return;

  if(strlen(fname) > 3 && strcmp(fname + strlen(fname) - 3, ".gz") == 0)
    pclose(lib_array[libNo].fp2);
  else
    fclose(lib_array[libNo].fp2);
}

boolean read1seqInLib(char *src_seq, char *src_name, int *len_seq, int *libNo, boolean pair, unsigned char asm_ctg)
{
  int i = *libNo;
  int prevLib = i;

  if(!lib_array[i].fp1  // file1 does not exist
      || (lib_array[i].curr_type != 1 && feof(lib_array[i].fp1)) // file1 reaches end and not type1
      || (lib_array[i].curr_type == 1 && feof(lib_array[i].fp1) && feof(lib_array[i].fp2))) //f1&f2 reaches end
    {
      if(lib_array[i].fp1 && feof(lib_array[i].fp1))
        {
          closeFp1InLab(i);
          //printf("[%s]%d reads in current file , (%.1f) map-rate .\n",__FUNCTION__,single_count,single_map/single_count);
          single_count = single_map = 0;
        }

      if(lib_array[i].fp2 && feof(lib_array[i].fp2))
        {
          closeFp2InLab(i);
          //printf("[%s]%d reads in current file , (%.1f) map-rate .\n",__FUNCTION__,single_count,single_map/single_count);
          single_count = single_map = 0;
        }

      *libNo = nextValidIndex(i, pair, asm_ctg);
      i = *libNo;

      if(lib_array[i].rd_len_cutoff > 0)
        maxReadLen = lib_array[i].rd_len_cutoff < maxReadLen4all ?
                     lib_array[i].rd_len_cutoff : maxReadLen4all;
      else
        maxReadLen = maxReadLen4all;

      //record insert size info
      //printf("from lib %d to %d, read %lld to %ld\n",prevLib,i,readNumBack,n_solexa);
      if(pair && i != prevLib)
        {
          if(readNumBack < n_solexa)
            {
              pes[gradsCounter].PE_bound = n_solexa;
              pes[gradsCounter].rank = lib_array[prevLib].rank;
              pes[gradsCounter].pair_num_cut = lib_array[prevLib].pair_num_cut;
              pes[gradsCounter++].insertS = lib_array[prevLib].avg_ins;
              readNumBack = n_solexa;
            }
        }

      if(i >= num_libs)
        return 0;

      openFileInLib(i);

      if(lib_array[i].curr_type == 1)
        {
          readseq1by1(src_seq, src_name, len_seq, lib_array[i].fp1, -1);
          readseq1by1(src_seq, src_name, len_seq, lib_array[i].fp2, -1);
        }
      else if(lib_array[i].curr_type == 3 || lib_array[i].curr_type == 4)
        readseq1by1(src_seq, src_name, len_seq, lib_array[i].fp1, -1);

    }

  if(lib_array[i].curr_type == 1)
    {
      if(lib_array[i].paired == 1)
        {
          readseq1by1(src_seq, src_name, len_seq, lib_array[i].fp1, 1);

          if(lib_array[i].reverse)
            reverse2k(src_seq, *len_seq);

          lib_array[i].paired = 2;

          if(*len_seq > 0 || !feof(lib_array[i].fp1))
            {
              n_solexa++;
              return 1;
            }
          else
            return read1seqInLib(src_seq, src_name, len_seq, libNo, pair, asm_ctg);
        }
      else
        {
          readseq1by1(src_seq, src_name, len_seq, lib_array[i].fp2, 1);

          if(lib_array[i].reverse)
            reverse2k(src_seq, *len_seq);

          lib_array[i].paired = 1;
          n_solexa++;
          return 1; //can't fail to read a read2
        }
    }

  if(lib_array[i].curr_type == 2)
    {
      if(lib_array[i].paired == 1)
        {
          read1seqfq(src_seq, src_name, len_seq, lib_array[i].fp1);

          /*
          if(*len_seq>0){
          	for(j=0;j<*len_seq;j++)
          		printf("%c",int2base(src_seq[j]));
          	printf("\n");
          }
          */
          if(lib_array[i].reverse)
            reverse2k(src_seq, *len_seq);

          lib_array[i].paired = 2;

          if(*len_seq > 0 || !feof(lib_array[i].fp1))
            {
              n_solexa++;
              return 1;
            }
          else
            return read1seqInLib(src_seq, src_name, len_seq, libNo, pair, asm_ctg);
        }
      else
        {
          read1seqfq(src_seq, src_name, len_seq, lib_array[i].fp2);

          if(lib_array[i].reverse)
            reverse2k(src_seq, *len_seq);

          lib_array[i].paired = 1;
          n_solexa++;
          return 1; //can't fail to read a read2
        }
    }

  if(lib_array[i].curr_type == 5)
    read1seqfq(src_seq, src_name, len_seq, lib_array[i].fp1);
  else
    {
      readseq1by1(src_seq, src_name, len_seq, lib_array[i].fp1, 1);
    }

  /*
  int t;
  for(t=0;t<*len_seq;t++)
  	printf("%d",src_seq[t]);
  printf("\n");
  */
  if(lib_array[i].reverse)
    reverse2k(src_seq, *len_seq);

  if(*len_seq > 0 || !feof(lib_array[i].fp1))
    {
      n_solexa++;
      return 1;
    }
  else
    return read1seqInLib(src_seq, src_name, len_seq, libNo, pair, asm_ctg);
}
